This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

# Install packages
install.packages("twitteR")
trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.6/twitteR_1.1.9.tgz'
Content type 'application/x-gzip' length 537986 bytes (525 KB)
==================================================
downloaded 525 KB

The downloaded binary packages are in
    /var/folders/4l/sk4ctsk17s9dx3hh5jx1km9w0000gn/T//RtmpJdjgHD/downloaded_packages
install.packages("RCurl")
trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.6/RCurl_1.98-1.2.tgz'
Content type 'application/x-gzip' length 1006085 bytes (982 KB)
==================================================
downloaded 982 KB

The downloaded binary packages are in
    /var/folders/4l/sk4ctsk17s9dx3hh5jx1km9w0000gn/T//RtmpJdjgHD/downloaded_packages
install.packages("httr")
Error in install.packages : Updating loaded packages
install.packages("syuzhet")
trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.6/syuzhet_1.0.4.tgz'
Content type 'application/x-gzip' length 2936471 bytes (2.8 MB)
==================================================
downloaded 2.8 MB

The downloaded binary packages are in
    /var/folders/4l/sk4ctsk17s9dx3hh5jx1km9w0000gn/T//RtmpJdjgHD/downloaded_packages
install.packages("tm")
Error in install.packages : Updating loaded packages

Restarting R session...
#install.packages("data.table")
#tweets <- as.data.frame(fread(file="FINAL_CLEAN_DATA.csv", header=TRUE))
# LOAD DATA 
tweets <- read.csv(file = 'FINAL_CLEAN_DATA.csv')
head(tweets)
# DATA CLEANING 
# we need to clean the tweets for further analysis 
# removed hashtags, junk characters, other twitter handles and URLs from the tags using gsub function so we have tweets for further analysis
# gsub(pattern, replacement, x, ignore.case = FALSE, perl = FALSE, fixed = FALSE, useBytes = FALSE) - removed all the hashtags, junk characters and other urls - extraneous information
tweets$text = gsub("&amp", "", tweets$text)
tweets$text = gsub("&amp", "", tweets$text)
tweets$text = gsub("(RT|via)((?:\\b\\W*@\\w+)+)", "", tweets$text)
tweets$text = gsub("@\\w+", "", tweets$text)
tweets$text = gsub("[[:punct:]]", "", tweets$text)
tweets$text = gsub("[[:digit:]]", "", tweets$text)
tweets$text = gsub("http\\w+", "", tweets$text)
tweets$text = gsub("[ \t]{2,}", "", tweets$text)
tweets$text = gsub("^\\s+|\\s+$", "", tweets$text)
tweets$text <- iconv(tweets$text, "UTF-8", "ASCII", sub="")
# Emotions for each tweet using NRC dictionary
install.packages("syuzhet")
trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.6/syuzhet_1.0.4.tgz'
Content type 'application/x-gzip' length 2936471 bytes (2.8 MB)
==================================================
downloaded 2.8 MB

The downloaded binary packages are in
    /var/folders/4l/sk4ctsk17s9dx3hh5jx1km9w0000gn/T//RtmpaH0jgU/downloaded_packages
library(syuzhet)
emotions <- get_nrc_sentiment(tweets$text)
emo_bar = colSums(emotions)
emo_sum = data.frame(count=emo_bar, emotion=names(emo_bar))
emo_sum$emotion = factor(emo_sum$emotion, levels=emo_sum$emotion[order(emo_sum$count, decreasing = TRUE)])
# Visualize the emotions from NRC sentiments
#install.packages('plotly')
#  we used this package to get sentiment score for each tweet
# score the emotions on each tweet as syuzhet breaks emotion into 10 different categories.
# visualize results to what type of emotions are dominant in the tweets
library(plotly)
p <- plot_ly(emo_sum, x=~emotion, y=~count, type="bar", color=~emotion) %>%
  layout(xaxis=list(title=""), showlegend=FALSE,
         title="Tweets - Emotions")
#api_create(p,filename="Sentimentanalysis")

p
wordcloud_tweet = c(
  paste(tweets$text[emotions$anger > 0], collapse=" "),
  paste(tweets$text[emotions$anticipation > 0], collapse=" "),
  paste(tweets$text[emotions$disgust > 0], collapse=" "),
  paste(tweets$text[emotions$fear > 0], collapse=" "),
  paste(tweets$text[emotions$joy > 0], collapse=" "),
  paste(tweets$text[emotions$sadness > 0], collapse=" "),
  paste(tweets$text[emotions$surprise > 0], collapse=" "),
  paste(tweets$text[emotions$trust > 0], collapse=" ")
)
# create corpus
install.packages("http://cran.r-project.org/bin/windows/contrib/3.0/tm_0.5-10.zip",repos=NULL)
trying URL 'http://cran.r-project.org/bin/windows/contrib/3.0/tm_0.5-10.zip'
Warning in install.packages :
  cannot open URL 'http://cran.r-project.org/bin/windows/contrib/3.0/tm_0.5-10.zip': HTTP status was '404 Not Found'
Error in download.file(p, destfile, method, mode = "wb", ...) : 
  cannot open URL 'http://cran.r-project.org/bin/windows/contrib/3.0/tm_0.5-10.zip'
Error in install.packages : type == "both" cannot be used with 'repos = NULL'
library(tm)
corpus = Corpus(VectorSource(wordcloud_tweet))
# remove punctuation, convert every word in lower case and remove stop words
corpus = tm_map(corpus, tolower)
transformation drops documents
corpus = tm_map(corpus, removePunctuation)
transformation drops documents
corpus = tm_map(corpus, removeWords, c(stopwords("english")))
transformation drops documents
corpus = tm_map(corpus, stemDocument)
transformation drops documents
# create document term matrix
tdm = TermDocumentMatrix(corpus)
# convert as matrix
tdm = as.matrix(tdm)
tdmnew <- tdm[nchar(rownames(tdm)) < 11,]
tdm2 = as.matrix(tdm)
# column name binding
library(wordcloud)
colnames(tdm) = c('Fear', 'Trust', 'Anticipation', 'Sadness', 'Anger', 'Joy', 'Disgust', 'Surprise')
colnames(tdmnew) <- colnames(tdm)
comparison.cloud(tdmnew, random.order=FALSE,
                 colors = c("#00B2FF", "red", "#FF0099", "#6600CC", "green", "orange", "blue", "brown"),
                 title.size=1, max.words=300, scale=c(2.5, 0.4),rot.per=0.4)

library(wordcloud)
#colnames(tdm) = c('Anger', 'Anticipation', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise', 'Trust')
#colnames(tdmnew) <- colnames(tdm)
comparison.cloud(tdm2, random.order=FALSE,
                 colors = c("#361c39", "#835c76","#8c597a", "#6600CC", "#00b2b6", "#89588a", "#4c6676", "#840094"),
                 title.size=1, max.words=300, scale=c(2.5, 0.4),rot.per=0.4)

```

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCmVkaXRvcl9vcHRpb25zOiAKICBjaHVua19vdXRwdXRfdHlwZTogaW5saW5lCi0tLQoKVGhpcyBpcyBhbiBbUiBNYXJrZG93bl0oaHR0cDovL3JtYXJrZG93bi5yc3R1ZGlvLmNvbSkgTm90ZWJvb2suIFdoZW4geW91IGV4ZWN1dGUgY29kZSB3aXRoaW4gdGhlIG5vdGVib29rLCB0aGUgcmVzdWx0cyBhcHBlYXIgYmVuZWF0aCB0aGUgY29kZS4gCgpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ21kK1NoaWZ0K0VudGVyKi4gCgpgYGB7cn0KIyBJbnN0YWxsIHBhY2thZ2VzCmluc3RhbGwucGFja2FnZXMoInR3aXR0ZVIiKQppbnN0YWxsLnBhY2thZ2VzKCJSQ3VybCIpCmluc3RhbGwucGFja2FnZXMoImh0dHIiKQppbnN0YWxsLnBhY2thZ2VzKCJzeXV6aGV0IikKaW5zdGFsbC5wYWNrYWdlcygidG0iKQppbnN0YWxsLnBhY2thZ2VzKCJ3b3JkY2xvdWQiKQoKIyBMb2FkIHRoZSByZXF1aXJlZCBsaWJyYXJpZXMKbGlicmFyeSh0d2l0dGVSKQpsaWJyYXJ5KFJDdXJsKQpsaWJyYXJ5KGh0dHIpCmxpYnJhcnkodG0pCmxpYnJhcnkod29yZGNsb3VkKQpsaWJyYXJ5KHN5dXpoZXQpCgpgYGAKCmBgYHtyfQojaW5zdGFsbC5wYWNrYWdlcygiZGF0YS50YWJsZSIpCiN0d2VldHMgPC0gYXMuZGF0YS5mcmFtZShmcmVhZChmaWxlPSJGSU5BTF9DTEVBTl9EQVRBLmNzdiIsIGhlYWRlcj1UUlVFKSkKIyBMT0FEIERBVEEgCnR3ZWV0cyA8LSByZWFkLmNzdihmaWxlID0gJ0ZJTkFMX0NMRUFOX0RBVEEuY3N2JykKaGVhZCh0d2VldHMpCgoKYGBgCgpgYGB7cn0KCiMgREFUQSBDTEVBTklORyAKIyB3ZSBuZWVkIHRvIGNsZWFuIHRoZSB0d2VldHMgZm9yIGZ1cnRoZXIgYW5hbHlzaXMgCiMgcmVtb3ZlZCBoYXNodGFncywganVuayBjaGFyYWN0ZXJzLCBvdGhlciB0d2l0dGVyIGhhbmRsZXMgYW5kIFVSTHMgZnJvbSB0aGUgdGFncyB1c2luZyBnc3ViIGZ1bmN0aW9uIHNvIHdlIGhhdmUgdHdlZXRzIGZvciBmdXJ0aGVyIGFuYWx5c2lzCiMgZ3N1YihwYXR0ZXJuLCByZXBsYWNlbWVudCwgeCwgaWdub3JlLmNhc2UgPSBGQUxTRSwgcGVybCA9IEZBTFNFLCBmaXhlZCA9IEZBTFNFLCB1c2VCeXRlcyA9IEZBTFNFKSAtIHJlbW92ZWQgYWxsIHRoZSBoYXNodGFncywganVuayBjaGFyYWN0ZXJzIGFuZCBvdGhlciB1cmxzIC0gZXh0cmFuZW91cyBpbmZvcm1hdGlvbgoKdHdlZXRzJHRleHQgPSBnc3ViKCImYW1wIiwgIiIsIHR3ZWV0cyR0ZXh0KQp0d2VldHMkdGV4dCA9IGdzdWIoIiZhbXAiLCAiIiwgdHdlZXRzJHRleHQpCnR3ZWV0cyR0ZXh0ID0gZ3N1YigiKFJUfHZpYSkoKD86XFxiXFxXKkBcXHcrKSspIiwgIiIsIHR3ZWV0cyR0ZXh0KQp0d2VldHMkdGV4dCA9IGdzdWIoIkBcXHcrIiwgIiIsIHR3ZWV0cyR0ZXh0KQp0d2VldHMkdGV4dCA9IGdzdWIoIltbOnB1bmN0Ol1dIiwgIiIsIHR3ZWV0cyR0ZXh0KQp0d2VldHMkdGV4dCA9IGdzdWIoIltbOmRpZ2l0Ol1dIiwgIiIsIHR3ZWV0cyR0ZXh0KQp0d2VldHMkdGV4dCA9IGdzdWIoImh0dHBcXHcrIiwgIiIsIHR3ZWV0cyR0ZXh0KQp0d2VldHMkdGV4dCA9IGdzdWIoIlsgXHRdezIsfSIsICIiLCB0d2VldHMkdGV4dCkKdHdlZXRzJHRleHQgPSBnc3ViKCJeXFxzK3xcXHMrJCIsICIiLCB0d2VldHMkdGV4dCkKdHdlZXRzJHRleHQgPC0gaWNvbnYodHdlZXRzJHRleHQsICJVVEYtOCIsICJBU0NJSSIsIHN1Yj0iIikKCmBgYAoKYGBge3J9CiMgRW1vdGlvbnMgZm9yIGVhY2ggdHdlZXQgdXNpbmcgTlJDIGRpY3Rpb25hcnkKCmluc3RhbGwucGFja2FnZXMoInN5dXpoZXQiKQpsaWJyYXJ5KHN5dXpoZXQpCgoKCmVtb3Rpb25zIDwtIGdldF9ucmNfc2VudGltZW50KHR3ZWV0cyR0ZXh0KQplbW9fYmFyID0gY29sU3VtcyhlbW90aW9ucykKZW1vX3N1bSA9IGRhdGEuZnJhbWUoY291bnQ9ZW1vX2JhciwgZW1vdGlvbj1uYW1lcyhlbW9fYmFyKSkKZW1vX3N1bSRlbW90aW9uID0gZmFjdG9yKGVtb19zdW0kZW1vdGlvbiwgbGV2ZWxzPWVtb19zdW0kZW1vdGlvbltvcmRlcihlbW9fc3VtJGNvdW50LCBkZWNyZWFzaW5nID0gVFJVRSldKQpgYGAKYGBge3J9CiMgVmlzdWFsaXplIHRoZSBlbW90aW9ucyBmcm9tIE5SQyBzZW50aW1lbnRzCiNpbnN0YWxsLnBhY2thZ2VzKCdwbG90bHknKQojICB3ZSB1c2VkIHRoaXMgcGFja2FnZSB0byBnZXQgc2VudGltZW50IHNjb3JlIGZvciBlYWNoIHR3ZWV0CiMgc2NvcmUgdGhlIGVtb3Rpb25zIG9uIGVhY2ggdHdlZXQgYXMgc3l1emhldCBicmVha3MgZW1vdGlvbiBpbnRvIDEwIGRpZmZlcmVudCBjYXRlZ29yaWVzLgojIHZpc3VhbGl6ZSByZXN1bHRzIHRvIHdoYXQgdHlwZSBvZiBlbW90aW9ucyBhcmUgZG9taW5hbnQgaW4gdGhlIHR3ZWV0cwpsaWJyYXJ5KHBsb3RseSkKcCA8LSBwbG90X2x5KGVtb19zdW0sIHg9fmVtb3Rpb24sIHk9fmNvdW50LCB0eXBlPSJiYXIiLCBjb2xvcj1+ZW1vdGlvbikgJT4lCiAgbGF5b3V0KHhheGlzPWxpc3QodGl0bGU9IiIpLCBzaG93bGVnZW5kPUZBTFNFLAogICAgICAgICB0aXRsZT0iVHdlZXRzIC0gRW1vdGlvbnMiKQojYXBpX2NyZWF0ZShwLGZpbGVuYW1lPSJTZW50aW1lbnRhbmFseXNpcyIpCgpwCmBgYAoKCmBgYHtyfQp3b3JkY2xvdWRfdHdlZXQgPSBjKAogIHBhc3RlKHR3ZWV0cyR0ZXh0W2Vtb3Rpb25zJGFuZ2VyID4gMF0sIGNvbGxhcHNlPSIgIiksCiAgcGFzdGUodHdlZXRzJHRleHRbZW1vdGlvbnMkYW50aWNpcGF0aW9uID4gMF0sIGNvbGxhcHNlPSIgIiksCiAgcGFzdGUodHdlZXRzJHRleHRbZW1vdGlvbnMkZGlzZ3VzdCA+IDBdLCBjb2xsYXBzZT0iICIpLAogIHBhc3RlKHR3ZWV0cyR0ZXh0W2Vtb3Rpb25zJGZlYXIgPiAwXSwgY29sbGFwc2U9IiAiKSwKICBwYXN0ZSh0d2VldHMkdGV4dFtlbW90aW9ucyRqb3kgPiAwXSwgY29sbGFwc2U9IiAiKSwKICBwYXN0ZSh0d2VldHMkdGV4dFtlbW90aW9ucyRzYWRuZXNzID4gMF0sIGNvbGxhcHNlPSIgIiksCiAgcGFzdGUodHdlZXRzJHRleHRbZW1vdGlvbnMkc3VycHJpc2UgPiAwXSwgY29sbGFwc2U9IiAiKSwKICBwYXN0ZSh0d2VldHMkdGV4dFtlbW90aW9ucyR0cnVzdCA+IDBdLCBjb2xsYXBzZT0iICIpCikKCiMgY3JlYXRlIGNvcnB1cwppbnN0YWxsLnBhY2thZ2VzKCJodHRwOi8vY3Jhbi5yLXByb2plY3Qub3JnL2Jpbi93aW5kb3dzL2NvbnRyaWIvMy4wL3RtXzAuNS0xMC56aXAiLHJlcG9zPU5VTEwpCmxpYnJhcnkodG0pCmNvcnB1cyA9IENvcnB1cyhWZWN0b3JTb3VyY2Uod29yZGNsb3VkX3R3ZWV0KSkKCiMgcmVtb3ZlIHB1bmN0dWF0aW9uLCBjb252ZXJ0IGV2ZXJ5IHdvcmQgaW4gbG93ZXIgY2FzZSBhbmQgcmVtb3ZlIHN0b3Agd29yZHMKCmNvcnB1cyA9IHRtX21hcChjb3JwdXMsIHRvbG93ZXIpCmNvcnB1cyA9IHRtX21hcChjb3JwdXMsIHJlbW92ZVB1bmN0dWF0aW9uKQpjb3JwdXMgPSB0bV9tYXAoY29ycHVzLCByZW1vdmVXb3JkcywgYyhzdG9wd29yZHMoImVuZ2xpc2giKSkpCmNvcnB1cyA9IHRtX21hcChjb3JwdXMsIHN0ZW1Eb2N1bWVudCkKCiMgY3JlYXRlIGRvY3VtZW50IHRlcm0gbWF0cml4Cgp0ZG0gPSBUZXJtRG9jdW1lbnRNYXRyaXgoY29ycHVzKQoKIyBjb252ZXJ0IGFzIG1hdHJpeAp0ZG0gPSBhcy5tYXRyaXgodGRtKQp0ZG1uZXcgPC0gdGRtW25jaGFyKHJvd25hbWVzKHRkbSkpIDwgMTEsXQoKdGRtMiA9IGFzLm1hdHJpeCh0ZG0pCgojIGNvbHVtbiBuYW1lIGJpbmRpbmcKbGlicmFyeSh3b3JkY2xvdWQpCmNvbG5hbWVzKHRkbSkgPSBjKCdGZWFyJywgJ1RydXN0JywgJ0FudGljaXBhdGlvbicsICdTYWRuZXNzJywgJ0FuZ2VyJywgJ0pveScsICdEaXNndXN0JywgJ1N1cnByaXNlJykKY29sbmFtZXModGRtbmV3KSA8LSBjb2xuYW1lcyh0ZG0pCmNvbXBhcmlzb24uY2xvdWQodGRtbmV3LCByYW5kb20ub3JkZXI9RkFMU0UsCiAgICAgICAgICAgICAgICAgY29sb3JzID0gYygiIzAwQjJGRiIsICJyZWQiLCAiI0ZGMDA5OSIsICIjNjYwMENDIiwgImdyZWVuIiwgIm9yYW5nZSIsICJibHVlIiwgImJyb3duIiksCiAgICAgICAgICAgICAgICAgdGl0bGUuc2l6ZT0xLCBtYXgud29yZHM9MzAwLCBzY2FsZT1jKDIuNSwgMC40KSxyb3QucGVyPTAuNCkKYGBgCgpgYGB7cn0KbGlicmFyeSh3b3JkY2xvdWQpCiNjb2xuYW1lcyh0ZG0pID0gYygnQW5nZXInLCAnQW50aWNpcGF0aW9uJywgJ0Rpc2d1c3QnLCAnRmVhcicsICdKb3knLCAnU2FkbmVzcycsICdTdXJwcmlzZScsICdUcnVzdCcpCiNjb2xuYW1lcyh0ZG1uZXcpIDwtIGNvbG5hbWVzKHRkbSkKY29tcGFyaXNvbi5jbG91ZCh0ZG0yLCByYW5kb20ub3JkZXI9RkFMU0UsCiAgICAgICAgICAgICAgICAgY29sb3JzID0gYygiIzM2MWMzOSIsICIjODM1Yzc2IiwiIzhjNTk3YSIsICIjNjYwMENDIiwgIiMwMGIyYjYiLCAiIzg5NTg4YSIsICIjNGM2Njc2IiwgIiM4NDAwOTQiKSwKICAgICAgICAgICAgICAgICB0aXRsZS5zaXplPTEsIG1heC53b3Jkcz0zMDAsIHNjYWxlPWMoMi41LCAwLjQpLHJvdC5wZXI9MC40KQpgYGAKYGBgCgpBZGQgYSBuZXcgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpJbnNlcnQgQ2h1bmsqIGJ1dHRvbiBvbiB0aGUgdG9vbGJhciBvciBieSBwcmVzc2luZyAqQ21kK09wdGlvbitJKi4KCldoZW4geW91IHNhdmUgdGhlIG5vdGVib29rLCBhbiBIVE1MIGZpbGUgY29udGFpbmluZyB0aGUgY29kZSBhbmQgb3V0cHV0IHdpbGwgYmUgc2F2ZWQgYWxvbmdzaWRlIGl0IChjbGljayB0aGUgKlByZXZpZXcqIGJ1dHRvbiBvciBwcmVzcyAqQ21kK1NoaWZ0K0sqIHRvIHByZXZpZXcgdGhlIEhUTUwgZmlsZSkuIAoKVGhlIHByZXZpZXcgc2hvd3MgeW91IGEgcmVuZGVyZWQgSFRNTCBjb3B5IG9mIHRoZSBjb250ZW50cyBvZiB0aGUgZWRpdG9yLiBDb25zZXF1ZW50bHksIHVubGlrZSAqS25pdCosICpQcmV2aWV3KiBkb2VzIG5vdCBydW4gYW55IFIgY29kZSBjaHVua3MuIEluc3RlYWQsIHRoZSBvdXRwdXQgb2YgdGhlIGNodW5rIHdoZW4gaXQgd2FzIGxhc3QgcnVuIGluIHRoZSBlZGl0b3IgaXMgZGlzcGxheWVkLgoK